JsonToTable.php

<?php


class JSONToTable {

    protected $sourceFile;
    protected $outDir;
    protected $pdo;
    
    protected $tempSqliteFile;
    protected $infoGenFile;
    protected $schemaGenFile;
    protected $schemaStaticFile;
    protected $sqlGenFile;

    public function __construct($sourceFile, $outputDir, $pdo=null){
        if (!is_dir($outputDir)){
            throw new \Exception("Directory doesn't exist.");
        }
        $this->sourceFile = $sourceFile;
        $this->outDir = $outputDir;
        $tempSqliteFile = $outputDir.'/placeholder-sqlite-file-'.uniqid().'.sql3';
        $this->tempSqliteFile = $tempSqliteFile;
        $this->pdo = $pdo;
        
        $this->infoGenFile = $this->outDir.'/schema-info-gen.json';
        $this->schemaGenFile = $this->outDir.'/schema-gen.json';
        $this->schemaStaticFile = $this->outDir.'/schema.json';
        if (!is_file($this->schemaStaticFile)){
            $this->schemaStaticFile = dirname($this->outDir).'/schema.json';
        }
        $this->sqlGenFile = $this->outDir.'/sql-gen';
    }
    
    protected function lineToRow($line){
        if (trim($line)=='[') return false;
        if (trim($line)==']')return false;
        $line = substr($line,0,strrpos($line,'}')+1);
        $row = json_decode($line, true);
        return $row;
    }
    
    public function generateSchema($generateIfStaticPresent=true){
        
        if (!$generateIfStaticPresent
            &&(file_exists($this->schemaStaticFile)
            ||file_exists($this->schemaGenFile))){
            return;
        }
        
        $schemaInfo = $this->getSchemaInfo();
        $info = json_encode($schemaInfo, JSON_PRETTY_PRINT);
        file_put_contents($this->infoGenFile, $info);
        
        $schema = [];
        
        foreach ($schemaInfo as $col => $data){
            $maxLen = $data['maxlen'];
            if ($maxLen > 1000){
                $type = "TEXT";
            } else if ($data['is_date']>0){
                $type = 'DATE';
            } else if ($data['is_string']>0
                &&$data['is_string']>$data['is_numeric']
                || $data['is_array'] > 0 
                && $data['is_array'] > $data['is_numeric']
            ) {
                $len = $maxLen + 50;
                $type = "VARCHAR({$len})"; //or varchar
            } else if ($data['is_float']>0 
                && $data['is_float'] > $data['is_int']
            ){
                $type  = "FLOAT";
            } else if ($data['is_bool']>0){
                $type = "BOOL";
            } else {
                $len = $maxLen;
                if ($len < 10)$len = 10;
                $type = "int({$len})";
            }
            
            $schema[$col]  = [
                'name'=> $col,
                'type'=> $type,
            ];
        }
        
        $schemaJson = json_encode($schema, JSON_PRETTY_PRINT);
        file_put_contents($this->schemaGenFile, $schemaJson);
    }
    
    protected function getSchemaInfo(){
        $schemaData = [];
        $sampleData = [
            'maxlen'=> 0,
            'minlen'=> false,
            'count' => 0,
            'is_string' => 0,
            'is_numeric' => 0,
            'is_int' => 0,
            'is_float'=> 0,
            'is_bool' => 0,
            'is_array'=> 0,

            'is_date' => 0,
            'is_json' => 0,
            
            'sampleValue' => '',
        ];
        
        //loop over every row
        //modify keys of schemaDataArray
        // finally json_encode & output the schema data to an info file <- For the developer to inspect their input data
        // json_encode & output an actual schema-file that details how to create the table.
        
        $reader = fopen($this->sourceFile, 'r');
        
        while ($line = fgets($reader)) {
            $row = $this->lineToRow($line);
            if ($row==false)continue;
            foreach ($row as $column => $value){
                $data = $schemaData[$column] ?? $sampleData;
                $data['count']++;
                $valueForLen = is_array($value) ? json_encode($value) : $value;
                $len = strlen($valueForLen);
                
                if ($len===false || $data['maxlen'] > $len){
                    $data['maxlen'] = $data['maxlen'];
                } else {
                    $data['maxlen'] = $len;
                    $data['sampleValue'] = substr($valueForLen, 0, 1000);
                }
                $data['minlen'] = ($len == false || $data['minlen'] < $len) && $data['minlen'] !== false ? $data['minlen'] : $len;
                
                $data['is_string'] += is_string($value) ? 1 : 0;
                $data['is_bool'] += is_bool($value) ? 1 : 0;
                $data['is_array'] += is_array($value) ? 1 : 0;
                $data['is_date'] += $this->is_date($value) ? 1 : 0;

                if (is_numeric($value)){
                    $data['is_numeric'] += 1;
                    $data['is_int'] += is_int($value) || (int)$value==$value ? 1 : 0;
                    $data['is_float'] += is_float($value) || (float)$value==$value ? 1 : 0;
                }
                
                if (is_string($value)){
                    $json = json_decode($value, true);
                    $data['is_json'] += ($json===null) ? 0 : 1;
                }
                
                $schemaData[$column] = $data;
            }
        }
        
        fclose($reader);
        
        return $schemaData;
    }

    
    public function generateSql($tableName,$dropIfExists = true, $chunkByLength=false){
        $schemaFile = file_exists($this->schemaStaticFile) ? $this->schemaStaticFile
                        : (file_exists($this->schemaGenFile) ? $this->schemaGenFile : null);
        if ($schemaFile==null){
            $this->generateSchema();
            $schemaFile = $this->schemaGenFile;
        }
        $schemaJson = json_decode(file_get_contents($schemaFile), true);
        if ($schemaJson==null)throw new \Exception ("Schema File '{$schemaFile}' does NOT have valid json");
        
        $this->generateSqlCreate($tableName, $schemaJson, $dropIfExists);
        $this->generateSqlInsert($tableName, $schemaJson, $chunkByLength);
    }
    
    public function generateSqlCreate($tableName, $schemaJson, $dropIfExists){
        $colStatements = [];
        foreach ($schemaJson as $col => $info){
            $statement = '`'.$info['name'].'` '.$info['type'];
            $colStatements[] = $statement;
        }
        $colsSql = implode(", ", $colStatements);
        $drop = $dropIfExists ? "DROP TABLE IF EXISTS `{$tableName}`;\n" : '';
        $sql =
        <<<SQL
            {$drop}
            CREATE TABLE `{$tableName}`
            (
            {$colsSql}
            )
            ;
            
        SQL;
            
        file_put_contents($this->sqlGenFile.'-create.sql',$sql);
    }

    public function generateSqlInsert($tableName, $schemaJson, $chunkByLength){
        $pdo = $this->pdo;
        if ($pdo==null)$pdo = new PDO('sqlite:'.$this->tempSqliteFile);
         
        $reader = fopen($this->sourceFile, 'r');
        $cols = array_combine(array_keys($schemaJson), array_keys($schemaJson));
        $colsSql = '`'.implode('`,`', $cols).'`';
        $insert = 
        <<<SQL
            INSERT INTO `{$tableName}` 
                ( ${colsSql} )
            VALUES
        SQL;
        
        //goto here
        $rowCount = 0;
        $fileIndex = 0;
        $writer = null;
        $rows = [];
        while ($line = fgets($reader)) {
            $row = $this->lineToRow($line);
            if ($row==false)continue;
            $rowCount++;
            $row = array_filter($row,
                function($value, $key) use ($cols){
                    return isset($cols[$key]);
                }, ARRAY_FILTER_USE_BOTH
            );
            $row = array_map(
                function($value) use ($pdo){
                    if (is_array($value)){
                        $value = json_encode($value);
                    }
                    if (is_string($value))$value = $pdo->quote($value); 
                    else if (strlen($value)==0)$value = 'NULL';
//                     $value = '"'.substr($value,1,-1).'"';
//                     return "{$value}";// Why am I returning it like that?
                    return $value;
                }, $row
            );
            $data = [];
            foreach ($cols as $name){
                $data[] = $row[$name] ?? '0';
            }
            // $leadComma = $rowCount===1 ? '' : ',';
            $rowSql = "\n".'( '. implode(', ',$data).')';

            // echo "\n$rowCount";

            if ($writer === null || $chunkByLength!==false && $len + strlen($rowSql) > $chunkByLength){
                // finish what we have. This line gets written to next file
                // file name increments by 1
                if ($writer!==null){
                    fwrite($writer, "\n;\n");
                    fclose($writer);
                }
                $writer = fopen($this->sqlGenFile."-insert-{$fileIndex}.sql", 'w');
                fwrite($writer, $insert);
                $fileIndex++;
                $rowCount = 1;
                $len = strlen($insert);
            }
            if ($rowCount!==1){
                $rowSql = ",".$rowSql;
            }
            
            fwrite($writer,$rowSql);
            $len += strlen($rowSql);
            
        }
        fwrite($writer, "\n;\n");
        fclose($writer);
        fclose($reader);
        
        unlink($this->tempSqliteFile);
    }

    public function is_date($value){
        if (!is_string($value))return false;
        return preg_match('/[0-9]{4}\-[0-9]{2}\-[0-9]{2}/', $value);
    }

}